# -*- coding: UTF-8 -*-
import urllib
import urllib2
import cookielib
from HTMLParser import HTMLParser
import re
import xlrd
import xlwt
import string
#=============================================================================

class fdcinfo(object):
    objname=''
    companyname=''
    number=''
    pos=''




#=============================================================================
class myHtmlParser(HTMLParser):
    irstart=0
    irend=0
    result=[]
    data=[]
    iscleasspace=0
    def __init__(self):
        HTMLParser.__init__(self)
        self.flag=None
    def handle_starttag(self,tag,attrs):
        if self.irstart==1:
            if tag=='a':
                self.flag='a'
                for href,link in attrs:
                    if href=='href':
                        self.result.append(link)





    def handle_data(self,data):
        if self.irend==1:
            sdata = data.strip()
            if sdata!='':
                self.data.append(sdata)
                #print 'data:'+sdata

#=============================================================================
def getfdcinfo(fdcnumber):
    url = 'http://house.shunde.gov.cn/lp_detailinfo.jsp?id='+fdcnumber+'&type=1'
    print url
    response = urllib2.urlopen(url)
    m= myHtmlParser()
    m.irend=1
    m.iscleasspace=1
    fstr = response.read()
    m.feed(fstr)
    response.close()
    m.close()
    result=m.data
    readindx =0


    fif= fdcinfo()
    while readindx<len(result):
        kvalue = result[readindx]
        if kvalue == '开发企业名称':
            kvalue = result[readindx+1]
            fif.companyname = kvalue
        if kvalue == '项目名称':
            kvalue = result[readindx+1]
            fif.objname = kvalue
        if kvalue == '项目坐落':
            kvalue = result[readindx+1]
            fif.pos = kvalue
        readindx+=1
    fif.number = fdcnumber
    print '开发企业名称'+fif.companyname
    print '项目名称'+fif.objname
    print '项目坐落'+fif.pos

    return fif



def getfdcsellinfo(fdcnumber):
    url = 'http://house.shunde.gov.cn/lp_xs_detailinfo.jsp?id='+fdcnumber+'&type=1'
    print url
    response = urllib2.urlopen(url)
    m= myHtmlParser()
    m.irend=1
    fstr = response.read()
    m.feed(fstr)
    response.close()
    m.close()
    result=m.data
    starti=0
    endi=0
    vindx=0
    #预售
    for item in result:
        findstr = unicode(item,'utf-8')
        if findstr==u'房屋用途':
            starti = vindx
        if findstr==u'面积单位：平方米 金额单位：万元 均价单位：元/平方米':
            endi=vindx

        vindx+=1
    vindx=starti
    datas=[]
    while vindx<endi:
        datas.append(result[vindx])
        vindx+=1
    #====================================
    #获取头
    resultDic={}
    headlist=[]
    findx = 0
    while findx<10:
        headlist.append(datas[findx])
        findx+=1
    findx=10
    keyindx=0
    rowkey=''
    while findx<len(datas):
        if keyindx>=10:
            keyindx=0
        itemDic ={}

        kkey = headlist[keyindx]#获取头
        kvalue = datas[findx]#获取值
        if keyindx==0:
            rowkey =kvalue#如果是第一个，则是
            if rowkey=='面积':
                rowkey = datas[findx-10]+rowkey
        else:
            if resultDic.has_key(kkey):
                itemDic = resultDic[kkey]
            else:
                resultDic[kkey]=itemDic
            itemDic[rowkey] = kvalue.strip()
        keyindx+=1
        findx+=1
    resultDic['id']=fdcnumber
    return resultDic






def getPageList(pcount):
    allresults=[]
    indx=1
    while indx<=pcount:
        cookie = cookielib.CookieJar()
        opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
        url='http://www.yfci.gov.cn/HousePresell/user_kfs_old.aspx?lid=b2fe0e00-f601-4748-9b9e-1475a3ef0085&page='+str(indx)
        print  "浏览网页:"+url
        values={
            'lid':'b2fe0e00-f601-4748-9b9e-1475a3ef0085','p':str(indx)}
        data = urllib.urlencode(values)
        req = urllib2.Request(url,data)
        req.add_header('Host','www.yfci.gov.cn')
        req.add_header('Referer','http://www.yfci.gov.cn/HousePresell/user_kfs.aspx?key=&kind=&LID=b2fe0e00-f601-4748-9b9e-1475a3ef0085&page=2')
        req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0')
        try:
            response = opener.open(req)
        except :
            continue
        the_page=response.read()
        the_page= unicode(the_page,"gb2312",'ignore').encode("utf8")
        print the_page
        # print  the_page
        m= myHtmlParser()
        m.irstart=1
        m.feed(the_page)
        m.close()
        allresults = m.result
        indx+=1
    indx =0
    return allresults

print '============='


#==========================================================================

allfdcnumbers = getPageList(10)
print '================:'+str(len(allfdcnumbers))
#==========================================================================
#统计
#==========================================================================
